log using "/home/dcohen/work/202410_rents_replication/logs/01-02.log", replace t

********************************************************************************
*** SETUP
********************************************************************************

* Specify version
version 15
clear all 
set maxvar 7500

* Set global macro for input and output paths
global longpath "/home/dcohen/soep-data/soep.v35/stata_de+en/"
global savepath "/home/dcohen/work/202410_rents_replication//dat/proc-data/"

********************************************************************************
*** Merge Data Sets
********************************************************************************

* Regional indicators (REGIONL)
use "$longpath/regionl.dta", clear
keep hid syear bula gtyp bik gkz kkz plz kr_kkz kr_kkz_rek

* Merge with all variables from MICROM
merge 1:1 hid syear using "$longpath/microm.dta"
keep if syear >= 2005 & _merge == 3
drop _merge

* Drop irrelevant variables
drop ha_mfi* ha_mgs* ha_mlo* ha_mlt* ha_mmi* ha_mmm_p_* ha_mph_* ha_mpi_* ///
     ha_mpm_* ha_mps_* ha_mty_* ha_tfi_* ha_tfr_* ha_tgl_* ha_tgw_* ha_thf_* ///
     ha_tko_* ha_tme_* ha_tmo_* ha_wev_* ha_dug_* ha_mfi_* ha_mgm_* ha_mlp_p_* ///
     mz_mgm_* mz_mgs_* mz_mlp_p_* mz_mlt_* mz_mmi_* mz_mmm_p_* mz_mpm_* mz_mty_* ///
     mz_wev_* ///
     p8_mgm_* p8_mgs_* p8_mlp_p_* p8_mlt_* p8_mmi_* p8_mmm_p_* p8_mpm_* p8_mty_* ///
     p8_wev_* p8_dug_* ///
     st_mgm_* st_mgs_* st_mlp_p_* st_mlt_* st_mmi_* st_mmm_* st_mpm_* st_mty_* ///
     st_wev_* st_dug_*
     
* Drop variables at the house and street level; keep neighborhoods (mz, p8)
drop st_*
unab ha_vars: ha_*
loc exclude "ha_mso_k_status"
foreach var in `:list ha_vars - exclude' {
  drop `var'
}

* Drop variables at the neighboorhood level with high missingness
drop mz_ewa* mz_mba* mz_reg* ///
     p8_ewa* p8_mba* mz_reg* ///
     id_* kr_id ///
     p8_p8t_w_flaeche t_mz_kkr_w_summe t_st_kkr_w_summe gk_reg_k_rst17

* Recode missings
global regionl_vars = "gtyp-kr_kkz_rek"
mvdecode $regionl_vars, mv(-1 -2 -3 = . \ -5 = .a\ -8 = .b)

global microm_vars = "micyear-p8_erp_p_ewlose "
mvdecode $microm_vars, mv(-1 -2 -3 = . \ -5 = .a\ -8 = .b)

foreach X of varlist $microm_vars {
  if(inlist("`: type `X''", "int", "double", "byte", "long")) {
    tab `X' if `X' == 0
  }
}

* Drop observations without PLZ information
drop if missing(pl_id) & missing(plz)
replace plz = pl_id if missing(plz)

* Rename variables
rename hid hh_id
rename syear year

********************************************************************************
*** Recodes
********************************************************************************

* Harmonize MZ/P8 Variables
global mzp8_vars = "alq_p_quote kkr_i_proeinwbrd kkr_i_proeinwwo "
global mzp8_vars = "$mzp8_vars kkr_w_proeinw met_p_afrika met_p_asien "
global mzp8_vars = "$mzp8_vars met_p_balkan met_p_deutschl met_p_griechen "
global mzp8_vars = "$mzp8_vars met_p_islam met_p_italien met_p_osteurop met_p_spaetaus "
global mzp8_vars = "$mzp8_vars met_p_spanport met_p_tuerkei met_p_uebrige  mlp_k_statuslp "
global mzp8_vars = "$mzp8_vars mmo_k_fluktu mmo_k_saldo mmo_k_volumen "
global mzp8_vars = "$mzp8_vars mmm_w_mighaush"

global p8_vars   = "muc_p_akademik mwo_p_eigentum mwo_p_mieter "
global p8_vars   = "$p8_vars erp_p_ewtaetige erp_p_ewlose"

foreach var of global mzp8_vars {
  cap drop nbh_`var'
  clonevar nbh_`var' = mz_`var'
  replace  nbh_`var' = p8_`var' if year > 2009
}

foreach var of global p8_vars {
  cap drop nbh_`var'
  clonevar nbh_`var' = p8_`var'
}

* Select relevant variables
keep hh_id year micyear bula kkz gkz plz pl_id mz_id p8_id gk_id ///
     gtyp kr_kkz_rek ha_mso_k_status nbh_*
     
/* 
Notes: Variable availablity by year

2014-2018	nbh_mwo_p_eig~m         Prozentanteil HH mit Wohneigentum
2014-2018	nbh_mwo_p_mie~r         Prozentanteil Mieter-HH
2014-2018	nbh_erp_p_ewt~e         Anteil der erwerbst�tigen Personen an allen Erwerbspersonen in %
2014-2018	nbh_erp_p_ewl~e         Anteil der erwerbslosen Personen an allen Erwerbspersonen in %
2013-2018       nbh_muc_p_aka~k         Prozentanteil der ueber 25-jaehrigen Akademiker an allen Personen
2010, 2012-2018	nbh_mlp_k_statuslp	microm Lebensphase nach soziooekonomischem Status
2010-2018       nbh_mmm_w_mig~h         Anzahl der Migrantenhaushalte
2010-2018       nbh_mmm_w_mig~h         Kaufkraft pro Einwohner
2009-2018       nbh_alq_p_quote         Arbeitslosenquote
2006-2018	nbh_met_p_*		Anteil der Einwanderer aus *
2005-2018       nbh_mmo_k_flu~u         Fluktuation
2005-2018       nbh_mmo_k_saldo 	Umzugssaldo
2005-2018       nbh_mmo_k_vol~n         Umzugsvolumen
2005-2018	ha_mso_k_status         Status
*/

* Recode percentages * 100 to proportions
foreach var of varlist *_p_* {
  replace `var' = `var' / 10000
}

* Status irrespective of life cycle
gen     nbh_mlp_k_status = .
replace nbh_mlp_k_status = 1 ///
        if inlist(nbh_mlp_k_statuslp, 1, 4, 7, 10, 13, 16, 19, 22, 25)
replace nbh_mlp_k_status = 2 ///
        if inlist(nbh_mlp_k_statuslp, 2, 5, 8, 11, 14, 17, 20, 23, 26)    
replace nbh_mlp_k_status = 3 ///
        if inlist(nbh_mlp_k_statuslp, 3, 6, 9, 12, 15, 18, 21, 24, 27)  
     
     
********************************************************************************
*** Save data
********************************************************************************

* Household-level (N = 291384)
saveold $savepath/microm_2005_2018.dta, replace v(12)

* House-level (N = 210631)
drop hh_id
duplicates drop
saveold $savepath/microm_2005_2018_ha.dta, replace v(12)


* Neighboorhood-level (N = 168654)
drop ha_*
duplicates drop
saveold $savepath/microm_2005_2018_nbh.dta, replace v(12)

log close
